options(repos = c(CRAN = "https://cran.rstudio.com"))
packages <- c("readxl", "tidyverse", "reshape2", "psych", "corrplot", "randomForest",
"e1071", "caret", "neuralnet", "nnet", "NeuralSens","ipred",
"DataExplorer", "ggplot2", "FactoMineR", "factoextra", "devtools",
"ggbiplot", "pROC", "broom", "ape", "dendextend")
# readxl: for reading excel data file
# tidyverse: for general data utility functions
# psych: for pairs panel plot
# corrplot: for correlation matrix
# e1071: for Naive Bayes
# caret: for data preparation and evaluation of model
# neuralnet: for NN modeling
# nnet: for multinomial logistic regression
# ipred: for bagging
# randomForest: for stacked learner and variable importance
# DataExplorer and ggplot2: for plots
# FactoMineR, factoextra, devtools, ggbiplot: for PCA plots
# pROC: for ROC and AUC plot
# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(packages[!installed_packages])
}
# Packages loading
invisible(lapply(packages, library, character.only = TRUE))
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
##
## Attaching package: 'reshape2'
##
##
## The following object is masked from 'package:tidyr':
##
## smiths
##
##
##
## Attaching package: 'psych'
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
## corrplot 0.92 loaded
##
## randomForest 4.7-1.1
##
## Type rfNews() to see new features/changes/bug fixes.
##
##
## Attaching package: 'randomForest'
##
##
## The following object is masked from 'package:psych':
##
## outlier
##
##
## The following object is masked from 'package:dplyr':
##
## combine
##
##
## The following object is masked from 'package:ggplot2':
##
## margin
##
##
## Loading required package: lattice
##
##
## Attaching package: 'caret'
##
##
## The following object is masked from 'package:purrr':
##
## lift
##
##
##
## Attaching package: 'neuralnet'
##
##
## The following object is masked from 'package:dplyr':
##
## compute
##
##
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
##
## Loading required package: usethis
##
## Loading required package: plyr
##
## ------------------------------------------------------------------------------
##
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
##
## ------------------------------------------------------------------------------
##
##
## Attaching package: 'plyr'
##
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
##
## The following object is masked from 'package:purrr':
##
## compact
##
##
## Loading required package: scales
##
##
## Attaching package: 'scales'
##
##
## The following objects are masked from 'package:psych':
##
## alpha, rescale
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
##
##
## Loading required package: grid
##
## Type 'citation("pROC")' for a citation.
##
##
## Attaching package: 'pROC'
##
##
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
##
##
##
## Attaching package: 'ape'
##
##
## The following object is masked from 'package:dplyr':
##
## where
##
##
##
## ---------------------
## Welcome to dendextend version 1.16.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
##
##
## Attaching package: 'dendextend'
##
##
## The following objects are masked from 'package:ape':
##
## ladderize, rotate
##
##
## The following object is masked from 'package:stats':
##
## cutree
[1] Mouse ID
[2:78] Values of expression levels of 77 proteins; the names of proteins are followed by N indicating that they were measured in the nuclear fraction. For example: DYRK1A_n
[79] Genotype: control (c) or trisomy (t)
[80] Treatment type: memantine (m) or saline (s)
[81] Behavior: context-shock (CS) or shock-context (SC)
[82] Class: c-CS-s, c-CS-m, c-SC-s, c-SC-m, t-CS-s, t-CS-m, t-SC-s, t-SC-m
# define the URL -- you could dynamically build this
URL <- "https://archive.ics.uci.edu/ml/machine-learning-databases/00342/Data_Cortex_Nuclear.xls"
# download file
download.file(URL, destfile="Data_Cortex_Nuclear.xls")
# load the file
ncortex.raw <- as.data.frame(read_excel("Data_Cortex_Nuclear.xls"))
head(ncortex.raw)
dim(ncortex.raw)
## [1] 1080 82
str(ncortex.raw)
## 'data.frame': 1080 obs. of 82 variables:
## $ MouseID : chr "309_1" "309_2" "309_3" "309_4" ...
## $ DYRK1A_N : num 0.504 0.515 0.509 0.442 0.435 ...
## $ ITSN1_N : num 0.747 0.689 0.73 0.617 0.617 ...
## $ BDNF_N : num 0.43 0.412 0.418 0.359 0.359 ...
## $ NR1_N : num 2.82 2.79 2.69 2.47 2.37 ...
## $ NR2A_N : num 5.99 5.69 5.62 4.98 4.72 ...
## $ pAKT_N : num 0.219 0.212 0.209 0.223 0.213 ...
## $ pBRAF_N : num 0.178 0.173 0.176 0.176 0.174 ...
## $ pCAMKII_N : num 2.37 2.29 2.28 2.15 2.13 ...
## $ pCREB_N : num 0.232 0.227 0.23 0.207 0.192 ...
## $ pELK_N : num 1.75 1.6 1.56 1.6 1.5 ...
## $ pERK_N : num 0.688 0.695 0.677 0.583 0.551 ...
## $ pJNK_N : num 0.306 0.299 0.291 0.297 0.287 ...
## $ PKCA_N : num 0.403 0.386 0.381 0.377 0.364 ...
## $ pMEK_N : num 0.297 0.281 0.282 0.314 0.278 ...
## $ pNR1_N : num 1.022 0.957 1.004 0.875 0.865 ...
## $ pNR2A_N : num 0.606 0.588 0.602 0.52 0.508 ...
## $ pNR2B_N : num 1.88 1.73 1.73 1.57 1.48 ...
## $ pPKCAB_N : num 2.31 2.04 2.02 2.13 2.01 ...
## $ pRSK_N : num 0.442 0.445 0.468 0.478 0.483 ...
## $ AKT_N : num 0.859 0.835 0.814 0.728 0.688 ...
## $ BRAF_N : num 0.416 0.4 0.4 0.386 0.368 ...
## $ CAMKII_N : num 0.37 0.356 0.368 0.363 0.355 ...
## $ CREB_N : num 0.179 0.174 0.174 0.179 0.175 ...
## $ ELK_N : num 1.87 1.76 1.77 1.29 1.32 ...
## $ ERK_N : num 3.69 3.49 3.57 2.97 2.9 ...
## $ GSK3B_N : num 1.54 1.51 1.5 1.42 1.36 ...
## $ JNK_N : num 0.265 0.256 0.26 0.26 0.251 ...
## $ MEK_N : num 0.32 0.304 0.312 0.279 0.274 ...
## $ TRKA_N : num 0.814 0.781 0.785 0.734 0.703 ...
## $ RSK_N : num 0.166 0.157 0.161 0.162 0.155 ...
## $ APP_N : num 0.454 0.431 0.423 0.411 0.399 ...
## $ Bcatenin_N : num 3.04 2.92 2.94 2.5 2.46 ...
## $ SOD1_N : num 0.37 0.342 0.344 0.345 0.329 ...
## $ MTOR_N : num 0.459 0.424 0.425 0.429 0.409 ...
## $ P38_N : num 0.335 0.325 0.325 0.33 0.313 ...
## $ pMTOR_N : num 0.825 0.762 0.757 0.747 0.692 ...
## $ DSCR1_N : num 0.577 0.545 0.544 0.547 0.537 ...
## $ AMPKA_N : num 0.448 0.421 0.405 0.387 0.361 ...
## $ NR2B_N : num 0.586 0.545 0.553 0.548 0.513 ...
## $ pNUMB_N : num 0.395 0.368 0.364 0.367 0.352 ...
## $ RAPTOR_N : num 0.34 0.322 0.313 0.328 0.312 ...
## $ TIAM1_N : num 0.483 0.455 0.447 0.443 0.419 ...
## $ pP70S6_N : num 0.294 0.276 0.257 0.399 0.393 ...
## $ NUMB_N : num 0.182 0.182 0.184 0.162 0.16 ...
## $ P70S6_N : num 0.843 0.848 0.856 0.76 0.768 ...
## $ pGSK3B_N : num 0.193 0.195 0.201 0.184 0.186 ...
## $ pPKCG_N : num 1.44 1.44 1.52 1.61 1.65 ...
## $ CDK5_N : num 0.295 0.294 0.302 0.296 0.297 ...
## $ S6_N : num 0.355 0.355 0.386 0.291 0.309 ...
## $ ADARB1_N : num 1.34 1.31 1.28 1.2 1.21 ...
## $ AcetylH3K9_N : num 0.17 0.171 0.185 0.16 0.165 ...
## $ RRP1_N : num 0.159 0.158 0.149 0.166 0.161 ...
## $ BAX_N : num 0.189 0.185 0.191 0.185 0.188 ...
## $ ARC_N : num 0.106 0.107 0.108 0.103 0.105 ...
## $ ERBB4_N : num 0.145 0.15 0.145 0.141 0.142 ...
## $ nNOS_N : num 0.177 0.178 0.176 0.164 0.168 ...
## $ Tau_N : num 0.125 0.134 0.133 0.123 0.137 ...
## $ GFAP_N : num 0.115 0.118 0.118 0.117 0.116 ...
## $ GluR3_N : num 0.228 0.238 0.245 0.235 0.256 ...
## $ GluR4_N : num 0.143 0.142 0.142 0.145 0.141 ...
## $ IL1B_N : num 0.431 0.457 0.51 0.431 0.481 ...
## $ P3525_N : num 0.248 0.258 0.255 0.251 0.252 ...
## $ pCASP9_N : num 1.6 1.67 1.66 1.48 1.53 ...
## $ PSD95_N : num 2.01 2 2.02 1.96 2.01 ...
## $ SNCA_N : num 0.108 0.11 0.108 0.12 0.12 ...
## $ Ubiquitin_N : num 1.045 1.01 0.997 0.99 0.998 ...
## $ pGSK3B_Tyr216_N: num 0.832 0.849 0.847 0.833 0.879 ...
## $ SHH_N : num 0.189 0.2 0.194 0.192 0.206 ...
## $ BAD_N : num 0.123 0.117 0.119 0.133 0.13 ...
## $ BCL2_N : num NA NA NA NA NA NA NA NA NA NA ...
## $ pS6_N : num 0.106 0.107 0.108 0.103 0.105 ...
## $ pCFOS_N : num 0.108 0.104 0.106 0.111 0.111 ...
## $ SYP_N : num 0.427 0.442 0.436 0.392 0.434 ...
## $ H3AcK18_N : num 0.115 0.112 0.112 0.13 0.118 ...
## $ EGR1_N : num 0.132 0.135 0.133 0.147 0.14 ...
## $ H3MeK4_N : num 0.128 0.131 0.127 0.147 0.148 ...
## $ CaNA_N : num 1.68 1.74 1.93 1.7 1.84 ...
## $ Genotype : chr "Control" "Control" "Control" "Control" ...
## $ Treatment : chr "Memantine" "Memantine" "Memantine" "Memantine" ...
## $ Behavior : chr "C/S" "C/S" "C/S" "C/S" ...
## $ class : chr "c-CS-m" "c-CS-m" "c-CS-m" "c-CS-m" ...
c-CS-s: control mice, stimulated to learn, injected with saline (9 mice)
c-CS-m: control mice, stimulated to learn, injected with memantine (10 mice)
c-SC-s: control mice, not stimulated to learn, injected with saline (9 mice)
c-SC-m: control mice, not stimulated to learn, injected with memantine (10 mice)
t-CS-s: trisomy mice, stimulated to learn, injected with saline (7 mice)
t-CS-m: trisomy mice, stimulated to learn, injected with memantine (9 mice)
t-SC-s: trisomy mice, not stimulated to learn, injected with saline (9 mice)
t-SC-m: trisomy mice, not stimulated to learn, injected with memantine (9 mice)
ggplot(ncortex.raw, aes(class)) +
geom_bar(aes(fill = class), alpha = 0.8) +
theme_light() +
labs(title = "Count of each class")
# since we are focusing our analysis on the `class` variable, we are going to drop
# the rest of the categorical variables from the dataNC along with the `MouseID`column
ncortex <- subset(ncortex.raw, select = -c(MouseID, Behavior, Genotype, Treatment))
# We have the numeric features (77) and a multi-class target variable (1)
ncortex$class <- as.factor(ncortex.raw$class)
proteins <- names(ncortex[1:77]) #creating list of protein names
classes <- ncortex$class
Imputation of missing values
names(ncortex.raw) <- gsub("_N", "", names(ncortex.raw)) #removing _N in protein names
#check for missing values
ncortex.raw %>%
is.na %>%
melt %>%
ggplot(data = .,
aes(x = Var2,
y = Var1)) +
geom_raster(aes(fill = value)) +
scale_fill_grey(name = "",
labels = c("Present","Missing")) +
theme_minimal() +
theme(axis.text.x = element_text(angle=45, vjust=0.5)) +
labs(x = "Variables in Dataset",
y = "Rows / observations",
title = "Missing Values in Dataset")
# check the columns with NAs
featNA <- names(which(sapply(ncortex, anyNA)))
dataNA <- ncortex[, featNA]
# total number of NAs
sum(is.na(dataNA))
## [1] 1396
# check the distribution of NAs in each column
colSums(is.na(dataNA))
## DYRK1A_N ITSN1_N BDNF_N NR1_N NR2A_N pAKT_N pBRAF_N
## 3 3 3 3 3 3 3
## pCAMKII_N pCREB_N pELK_N pERK_N pJNK_N PKCA_N pMEK_N
## 3 3 3 3 3 3 3
## pNR1_N pNR2A_N pNR2B_N pPKCAB_N pRSK_N AKT_N BRAF_N
## 3 3 3 3 3 3 3
## CAMKII_N CREB_N ELK_N ERK_N GSK3B_N JNK_N MEK_N
## 3 3 18 3 3 3 7
## TRKA_N RSK_N APP_N Bcatenin_N SOD1_N MTOR_N P38_N
## 3 3 3 18 3 3 3
## pMTOR_N DSCR1_N AMPKA_N NR2B_N pNUMB_N RAPTOR_N TIAM1_N
## 3 3 3 3 3 3 3
## pP70S6_N BAD_N BCL2_N pCFOS_N H3AcK18_N EGR1_N H3MeK4_N
## 3 213 285 75 180 210 270
# all columns have NAs below 50% of the sample size. Hence, they can be imputed.
# computing mean of all columns using colMeans()
means <- colMeans(dataNA, na.rm = TRUE)
# replacing NA with mean value of each column
for(i in colnames(dataNA)){
dataNA[,i][is.na(dataNA[,i])] <- means[i]
}
# check missing values in our feature dataNC frame
sum(is.na(dataNA))
## [1] 0
# replace the imputed features with the original dataNC
ncortex[,featNA] <- dataNA
# get numeric feature
nc.feat <- ncortex[,-ncol(ncortex)]
# get target variables
nc.Class <- ncortex$class
# check basic statistics
summary(ncortex)
## DYRK1A_N ITSN1_N BDNF_N NR1_N
## Min. :0.1453 Min. :0.2454 Min. :0.1152 Min. :1.331
## 1st Qu.:0.2882 1st Qu.:0.4737 1st Qu.:0.2877 1st Qu.:2.059
## Median :0.3665 Median :0.5664 Median :0.3167 Median :2.297
## Mean :0.4258 Mean :0.6171 Mean :0.3191 Mean :2.297
## 3rd Qu.:0.4876 3rd Qu.:0.6975 3rd Qu.:0.3480 3rd Qu.:2.528
## Max. :2.5164 Max. :2.6027 Max. :0.4972 Max. :3.758
##
## NR2A_N pAKT_N pBRAF_N pCAMKII_N
## Min. :1.738 Min. :0.06324 Min. :0.06404 Min. :1.344
## 1st Qu.:3.160 1st Qu.:0.20582 1st Qu.:0.16462 1st Qu.:2.480
## Median :3.763 Median :0.23125 Median :0.18227 Median :3.330
## Mean :3.844 Mean :0.23317 Mean :0.18185 Mean :3.537
## 3rd Qu.:4.425 3rd Qu.:0.25722 3rd Qu.:0.19723 3rd Qu.:4.481
## Max. :8.483 Max. :0.53905 Max. :0.31707 Max. :7.464
##
## pCREB_N pELK_N pERK_N pJNK_N
## Min. :0.1128 Min. :0.429 Min. :0.1492 Min. :0.05211
## 1st Qu.:0.1908 1st Qu.:1.206 1st Qu.:0.3375 1st Qu.:0.28153
## Median :0.2107 Median :1.356 Median :0.4442 Median :0.32127
## Mean :0.2126 Mean :1.429 Mean :0.5459 Mean :0.31351
## 3rd Qu.:0.2346 3rd Qu.:1.561 3rd Qu.:0.6632 3rd Qu.:0.34869
## Max. :0.3062 Max. :6.113 Max. :3.5667 Max. :0.49343
##
## PKCA_N pMEK_N pNR1_N pNR2A_N
## Min. :0.1914 Min. :0.05682 Min. :0.5002 Min. :0.2813
## 1st Qu.:0.2818 1st Qu.:0.24429 1st Qu.:0.7436 1st Qu.:0.5913
## Median :0.3130 Median :0.27718 Median :0.8215 Median :0.7206
## Mean :0.3179 Mean :0.27503 Mean :0.8258 Mean :0.7269
## 3rd Qu.:0.3523 3rd Qu.:0.30335 3rd Qu.:0.8983 3rd Qu.:0.8473
## Max. :0.4740 Max. :0.45800 Max. :1.4082 Max. :1.4128
##
## pNR2B_N pPKCAB_N pRSK_N AKT_N
## Min. :0.3016 Min. :0.5678 Min. :0.09594 Min. :0.06442
## 1st Qu.:1.3813 1st Qu.:1.1686 1st Qu.:0.40415 1st Qu.:0.59732
## Median :1.5632 Median :1.3688 Median :0.44064 Median :0.68224
## Mean :1.5620 Mean :1.5253 Mean :0.44285 Mean :0.68224
## 3rd Qu.:1.7485 3rd Qu.:1.8812 3rd Qu.:0.48181 3rd Qu.:0.75891
## Max. :2.7240 Max. :3.0614 Max. :0.65096 Max. :1.18217
##
## BRAF_N CAMKII_N CREB_N ELK_N
## Min. :0.1439 Min. :0.2130 Min. :0.1136 Min. :0.4977
## 1st Qu.:0.2644 1st Qu.:0.3309 1st Qu.:0.1619 1st Qu.:0.9479
## Median :0.3267 Median :0.3605 Median :0.1796 Median :1.1028
## Mean :0.3785 Mean :0.3634 Mean :0.1805 Mean :1.1734
## 3rd Qu.:0.4127 3rd Qu.:0.3938 3rd Qu.:0.1957 3rd Qu.:1.3182
## Max. :2.1334 Max. :0.5862 Max. :0.3196 Max. :2.8029
##
## ERK_N GSK3B_N JNK_N MEK_N
## Min. :1.132 Min. :0.1511 Min. :0.0463 Min. :0.1472
## 1st Qu.:1.994 1st Qu.:1.0233 1st Qu.:0.2204 1st Qu.:0.2473
## Median :2.403 Median :1.1604 Median :0.2448 Median :0.2731
## Mean :2.474 Mean :1.1726 Mean :0.2416 Mean :0.2728
## 3rd Qu.:2.871 3rd Qu.:1.3094 3rd Qu.:0.2632 3rd Qu.:0.3005
## Max. :5.198 Max. :2.4758 Max. :0.3872 Max. :0.4154
##
## TRKA_N RSK_N APP_N Bcatenin_N
## Min. :0.1987 Min. :0.1074 Min. :0.2356 Min. :1.135
## 1st Qu.:0.6173 1st Qu.:0.1496 1st Qu.:0.3665 1st Qu.:1.830
## Median :0.7049 Median :0.1667 Median :0.4022 Median :2.127
## Mean :0.6932 Mean :0.1684 Mean :0.4048 Mean :2.147
## 3rd Qu.:0.7740 3rd Qu.:0.1845 3rd Qu.:0.4419 3rd Qu.:2.419
## Max. :1.0016 Max. :0.3051 Max. :0.6327 Max. :3.681
##
## SOD1_N MTOR_N P38_N pMTOR_N
## Min. :0.2171 Min. :0.2011 Min. :0.2279 Min. :0.1666
## 1st Qu.:0.3197 1st Qu.:0.4110 1st Qu.:0.3520 1st Qu.:0.6836
## Median :0.4460 Median :0.4525 Median :0.4080 Median :0.7597
## Mean :0.5426 Mean :0.4525 Mean :0.4153 Mean :0.7590
## 3rd Qu.:0.6953 3rd Qu.:0.4880 3rd Qu.:0.4662 3rd Qu.:0.8412
## Max. :1.8729 Max. :0.6767 Max. :0.9333 Max. :1.1249
##
## DSCR1_N AMPKA_N NR2B_N pNUMB_N
## Min. :0.1553 Min. :0.2264 Min. :0.1848 Min. :0.1856
## 1st Qu.:0.5312 1st Qu.:0.3267 1st Qu.:0.5151 1st Qu.:0.3128
## Median :0.5770 Median :0.3588 Median :0.5637 Median :0.3476
## Mean :0.5852 Mean :0.3684 Mean :0.5653 Mean :0.3571
## 3rd Qu.:0.6343 3rd Qu.:0.4007 3rd Qu.:0.6143 3rd Qu.:0.3926
## Max. :0.9164 Max. :0.7008 Max. :0.9720 Max. :0.6311
##
## RAPTOR_N TIAM1_N pP70S6_N NUMB_N
## Min. :0.1948 Min. :0.2378 Min. :0.1311 Min. :0.1180
## 1st Qu.:0.2762 1st Qu.:0.3721 1st Qu.:0.2821 1st Qu.:0.1593
## Median :0.3050 Median :0.4074 Median :0.3787 Median :0.1782
## Mean :0.3158 Mean :0.4186 Mean :0.3945 Mean :0.1811
## 3rd Qu.:0.3473 3rd Qu.:0.4559 3rd Qu.:0.4807 3rd Qu.:0.1972
## Max. :0.5267 Max. :0.7221 Max. :1.1292 Max. :0.3166
##
## P70S6_N pGSK3B_N pPKCG_N CDK5_N
## Min. :0.3441 Min. :0.09998 Min. :0.5988 Min. :0.1812
## 1st Qu.:0.8267 1st Qu.:0.14925 1st Qu.:1.2968 1st Qu.:0.2726
## Median :0.9313 Median :0.16021 Median :1.6646 Median :0.2938
## Mean :0.9431 Mean :0.16121 Mean :1.7066 Mean :0.2924
## 3rd Qu.:1.0451 3rd Qu.:0.17174 3rd Qu.:2.1130 3rd Qu.:0.3125
## Max. :1.6800 Max. :0.25321 Max. :3.3820 Max. :0.8174
##
## S6_N ADARB1_N AcetylH3K9_N RRP1_N
## Min. :0.1302 Min. :0.5291 Min. :0.05253 Min. :-0.06201
## 1st Qu.:0.3167 1st Qu.:0.9305 1st Qu.:0.10357 1st Qu.: 0.14902
## Median :0.4010 Median :1.1283 Median :0.15042 Median : 0.16210
## Mean :0.4292 Mean :1.1974 Mean :0.21648 Mean : 0.16663
## 3rd Qu.:0.5349 3rd Qu.:1.3802 3rd Qu.:0.26965 3rd Qu.: 0.17741
## Max. :0.8226 Max. :2.5399 Max. :1.45939 Max. : 0.61238
##
## BAX_N ARC_N ERBB4_N nNOS_N
## Min. :0.07233 Min. :0.06725 Min. :0.1002 Min. :0.09973
## 1st Qu.:0.16817 1st Qu.:0.11084 1st Qu.:0.1470 1st Qu.:0.16645
## Median :0.18074 Median :0.12163 Median :0.1564 Median :0.18267
## Mean :0.17931 Mean :0.12152 Mean :0.1565 Mean :0.18130
## 3rd Qu.:0.19158 3rd Qu.:0.13196 3rd Qu.:0.1654 3rd Qu.:0.19857
## Max. :0.24114 Max. :0.15875 Max. :0.2087 Max. :0.26074
##
## Tau_N GFAP_N GluR3_N GluR4_N
## Min. :0.09623 Min. :0.08611 Min. :0.1114 Min. :0.07258
## 1st Qu.:0.16799 1st Qu.:0.11277 1st Qu.:0.1957 1st Qu.:0.10889
## Median :0.18863 Median :0.12046 Median :0.2169 Median :0.12355
## Mean :0.21049 Mean :0.12089 Mean :0.2219 Mean :0.12656
## 3rd Qu.:0.23394 3rd Qu.:0.12772 3rd Qu.:0.2460 3rd Qu.:0.14195
## Max. :0.60277 Max. :0.21362 Max. :0.3310 Max. :0.53700
##
## IL1B_N P3525_N pCASP9_N PSD95_N
## Min. :0.2840 Min. :0.2074 Min. :0.8532 Min. :1.206
## 1st Qu.:0.4756 1st Qu.:0.2701 1st Qu.:1.3756 1st Qu.:2.079
## Median :0.5267 Median :0.2906 Median :1.5227 Median :2.242
## Mean :0.5273 Mean :0.2913 Mean :1.5483 Mean :2.235
## 3rd Qu.:0.5770 3rd Qu.:0.3116 3rd Qu.:1.7131 3rd Qu.:2.420
## Max. :0.8897 Max. :0.4437 Max. :2.5862 Max. :2.878
##
## SNCA_N Ubiquitin_N pGSK3B_Tyr216_N SHH_N
## Min. :0.1012 Min. :0.7507 Min. :0.5774 Min. :0.1559
## 1st Qu.:0.1428 1st Qu.:1.1163 1st Qu.:0.7937 1st Qu.:0.2064
## Median :0.1575 Median :1.2366 Median :0.8499 Median :0.2240
## Mean :0.1598 Mean :1.2393 Mean :0.8488 Mean :0.2267
## 3rd Qu.:0.1733 3rd Qu.:1.3631 3rd Qu.:0.9162 3rd Qu.:0.2417
## Max. :0.2576 Max. :1.8972 Max. :1.2046 Max. :0.3583
##
## BAD_N BCL2_N pS6_N pCFOS_N
## Min. :0.0883 Min. :0.08066 Min. :0.06725 Min. :0.08542
## 1st Qu.:0.1410 1st Qu.:0.11999 1st Qu.:0.11084 1st Qu.:0.11437
## Median :0.1579 Median :0.13476 Median :0.12163 Median :0.12847
## Mean :0.1579 Mean :0.13476 Mean :0.12152 Mean :0.13105
## 3rd Qu.:0.1676 3rd Qu.:0.13933 3rd Qu.:0.13196 3rd Qu.:0.14243
## Max. :0.2820 Max. :0.26151 Max. :0.15875 Max. :0.25653
##
## SYP_N H3AcK18_N EGR1_N H3MeK4_N
## Min. :0.2586 Min. :0.07969 Min. :0.1055 Min. :0.1018
## 1st Qu.:0.3981 1st Qu.:0.13397 1st Qu.:0.1592 1st Qu.:0.1743
## Median :0.4485 Median :0.16961 Median :0.1831 Median :0.2054
## Mean :0.4461 Mean :0.16961 Mean :0.1831 Mean :0.2054
## 3rd Qu.:0.4908 3rd Qu.:0.18717 3rd Qu.:0.1961 3rd Qu.:0.2192
## Max. :0.7596 Max. :0.47976 Max. :0.3607 Max. :0.4139
##
## CaNA_N class
## Min. :0.5865 c-CS-m :150
## 1st Qu.:1.0814 c-SC-m :150
## Median :1.3174 c-CS-s :135
## Mean :1.3378 c-SC-s :135
## 3rd Qu.:1.5858 t-CS-m :135
## Max. :2.1298 t-SC-m :135
## (Other):240
for(i in 1:length(proteins)){
plot <- ggplot(ncortex, aes(eval(parse(text = proteins[i])))) +
geom_histogram(aes(fill=..count..), color = "black", alpha = 0.5) +
scale_fill_gradient("Count", low="green", high="red") +
labs(title = proteins[i],
x = "Expression Level",
y = "Count") +
theme_light()
cat("#### ", proteins[i], "\n")
print(plot)
cat('\n\n')
}
ggplot(ncortex, aes(x = eval(parse(text = proteins[1])))) +
geom_histogram(aes(fill=..count..), color = "black", alpha = 0.5) +
scale_fill_gradient("Count", low="green", high="red") +
labs(title = proteins[1],
x = "Expression Level",
y = "Count")
for(i in 1:length(proteins)){
plot <- ggplot(ncortex, aes(x = class, y = eval(parse(text = proteins[i])))) +
geom_boxplot(aes(fill = class), alpha = 0.7) +
stat_summary(fun.y=mean, colour="darkred", geom="point", size=2) +
labs(y = proteins[i]) +
theme_light()
cat("#### ", proteins[i], "\n")
print(plot)
cat('\n\n')
}
Normalization
For normalization, we use min-max scaling from preProcess() and validate it using Shapiro-Wilk test.
# Min-max scaling (range 0-1 for each column)
nc.feat <- ncortex[1:77]
min_max_scaling <- function(data) {
pp <- preProcess(data.frame(data), method=c("range"))
scaled.nc <- predict(pp, as.data.frame(data))
return(scaled.nc)
}
scaled_ncortex <- min_max_scaling(data=nc.feat)
# check normality distribution using Shapiro-wilk test
testNormality <- function(data) {
norm.res <- data.frame(do.call(
cbind, lapply(data, function(x) shapiro.test(x)["p.value"])))
unnorm.col <- list(which(norm.res > 0.05))
print(paste("The unnormalized column(s) present in the data are", unnorm.col))
return(unlist(unnorm.col))
}
# check our scaled data for any unnormalized column(s)
colX <- testNormality(data = scaled_ncortex)
[1] “The unnormalized column(s) present in the data are c(4, 9, 28)”
# check the distribution curves for the above columns
pairs.panels(scaled_ncortex[,colX])
After plotting the distribution curves of the above columns, we observe that they are fairly normal and do not require further transformation.
##########################################
########## Feature Engineering ###########
##########################################
# All columns are normally distributed
norm_ncortex <- scaled_ncortex
norm_ncortex$Class <- nc.Class
head(norm_ncortex)
dim(norm_ncortex)
## [1] 1080 78
## Method-1: Correlation Cut-off
rawData <- scaled_ncortex
# compute the correlation matrix
corMatNC1 <- cor(rawData)
# visualize the matrix, clustering features by correlation index
corrplot(corMatNC1, order = "hclust")
# After inspecting the matrix, we set the correlation threshold at 0.75
# Apply correlation filter at 0.75
highlyCor <- findCorrelation(corMatNC1, 0.75)
#then we remove all the variable correlated with more 0.75.
reduced_data <- rawData[,-highlyCor]
# check the effect of correlation cut-off filter
corMatNC2 <- cor(reduced_data)
corrplot(corMatNC2, order = "hclust")
For correlation method, we use an ideal factor of 0.75 coefficient value as the threshold. So variables having inter-correlation with more than 0.75 value are dropped.
## Method -2: Variable Importance
feat <- reduced_data
feat$Class <- nc.Class
# training `randomForest` to calculate feature importance
rf <- randomForest(Class ~., data = feat)
var_imp <- varImp(rf, scale = FALSE)
# sort the score in decreasing order
var_imp_df <- data.frame(cbind(variable = rownames(var_imp), score = var_imp[,1]))
var_imp_df$score <- as.double(var_imp_df$score)
rf_scores <- var_imp_df[order(var_imp_df$score,decreasing = T),]
# setting up filter threshold --from the figure plotted down below
rf.filter <- sum(round(rf_scores$score) > 15)
# extracting data from filter
rf.feat <- ncortex[,rf_scores$variable[1:rf.filter]]
rf.feat$Class <- ncortex$class
# plotting the rf_scores to determine threshold
ggplot(rf_scores, aes(x=reorder(variable, score), y=score)) +
geom_point() +
geom_segment(aes(x=variable,xend=variable,y=0,yend=score)) +
ggtitle("RF-feature plot") +
ylab("IncNodePurity") +
xlab("Variable Name") +
coord_flip()
From the Rf-feature plot, we observe a constant downward trend from variable 15. Hence, we choose the threshold of “15” to filter our final set of features.
We perform PCA on our filtered data before we create feature subsets based on learning outcomes signified by our target classes.
##########################################
################ PCA ####################
##########################################
# initialize data for PCA
dataPCA <- rf.feat
# dim(dataPCA)
# colnames(dataPCA)
## feature subset
# normal and failed learning (subset-1)
NL.data1 <- dataPCA %>% filter(Class == 'c-CS-s')
FL.data <- dataPCA %>% filter(Class == 't-CS-s')
# normal and rescued learning (subset-2)
NL.data2 <- dataPCA %>% filter(Class == 'c-CS-m')
RL.data <- dataPCA %>% filter(Class == 't-CS-m')
# feature subset
feat_data1 <- rbind(NL.data1, FL.data)
dim(feat_data1)
## [1] 240 22
feat_data2 <- rbind(NL.data2, RL.data)
dim(feat_data2)
## [1] 285 22
# PCA with function PCA
pca1 <- PCA(feat_data1[,-ncol(feat_data1)], scale.unit=F, graph=F)
pca2 <- PCA(feat_data2[,-ncol(feat_data2)], scale.unit=F, graph=F)
# custom function to analyze PCA results
pcaEval <- function(pca){
scr.pca <- fviz_eig(pca, addlabels = T, ylim = c(0, 100))
p.pca <- plot(pca, choix = "var", shadow = TRUE, select = "cos2")
return(list(scr.pca, p.pca))
}
# biplot --to check distribution of observation among the two classes
PCA.plot <- function(pca, fdata) {
ggbiplot(pca,
groups = fdata$Class,
ellipse = TRUE,
circle = TRUE,
ellipse.prob = 0.7) +
scale_color_discrete(name = '')+
theme(legend.direction = 'horizontal', legend.position = 'top')
}
# check pca results for feature subset-1
PCA.plot(pca1, feat_data1) # biplot
From the above plot, we observe from pca1 analysis on feature subset-1 that there are few features that provide strong variance for the given learning outcomes such as
pCAMKII_N, pPKCG_N, pPKCAB_N.
feval1 <- pcaEval(pca= pca1)
feval1[1] # scree plot
## [[1]]
Here, from the scree plot, we notice that the first two PCs approximately describe 75% of variance in the subset-1.
feval1[2] # plot of important variables
## [[1]]
## Warning: ggrepel: 18 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# check pca results for feature subset-2
PCA.plot(pca2, feat_data2) # biplot
From the biplot of subset-2, we get to understand that are similar features showing strong variance for different set of learning outcomes too.
feval2 <- pcaEval(pca= pca2)
feval2[1] # scree plot
## [[1]]
From the second scree plot, we observe an explosive amount of variance from the first two PCs ~85%.
feval2[2] # plot of important variables
## [[1]]
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
The graph of variables are now very distinct for subset-2 with some handful features highlighted that may involve strongly for producing the respective learning outcomes.
##########################################
####### Train-Valid-Test splitting #######
##########################################
dataNC <- rf.feat
head(rf.feat)
# Simple into 3 sets
## --train: 60%
## --valid: 20%
## --test: 20%
set.seed(100)
splits = c(train = 0.6, test = 0.2, valid = 0.2)
grp = sample(cut(seq(nrow(dataNC)), nrow(dataNC)*cumsum(c(0,splits)),
labels = names(splits)))
# get the data splitted in a list
res = split(dataNC, grp)
# train set
trainX <- res$train
dim(trainX)
## [1] 648 22
# valid set
validX <- res$valid
dim(validX)
## [1] 216 22
# valid set
testX <- res$test
dim(testX)
## [1] 216 22
Custom formula
# formula for training
form <- paste(names(trainX)[1:ncol(trainX)-1], collapse = " + ")
formula.NC <- formula(paste(names(trainX)[ncol(trainX)], form, sep = " ~ "))
formula.NC # check
## Class ~ SOD1_N + pPKCG_N + CaNA_N + APP_N + pPKCAB_N + Ubiquitin_N +
## pCAMKII_N + pS6_N + P38_N + S6_N + pNUMB_N + pP70S6_N + AcetylH3K9_N +
## pGSK3B_N + AKT_N + ADARB1_N + IL1B_N + nNOS_N + pNR2A_N +
## H3AcK18_N + pELK_N
Naive Bayes
##########################################
############ Base Modeling ##############
##########################################
##Naive Bayes
# fit the model
nb.model <- e1071::naiveBayes(formula.NC, trainX)
# make predictions using valid set
nb.pred <- predict(nb.model, validX, type="class")
# check reference and predicted classification
nb.cm.valid <- confusionMatrix(nb.pred, validX$Class)
nb.cm.valid$table
## Reference
## Prediction c-CS-m c-CS-s c-SC-m c-SC-s t-CS-m t-CS-s t-SC-m t-SC-s
## c-CS-m 19 6 0 0 0 3 0 0
## c-CS-s 5 18 0 0 5 0 0 0
## c-SC-m 0 0 31 0 0 0 3 0
## c-SC-s 0 0 0 13 0 0 1 0
## t-CS-m 10 0 0 0 27 3 0 0
## t-CS-s 0 2 0 0 2 17 0 0
## t-SC-m 0 0 0 0 0 0 21 0
## t-SC-s 0 0 0 5 0 0 0 25
nb.cm.valid$overall[1:2]
## Accuracy Kappa
## 0.7916667 0.7603078
Here, from our base ’NB’ model, we observe there are several high chunks of misclassification. Hence, we perform a typical k-fold cross-validation (k=5) to improve the classification of the model.
# k-fold cross validation (k=5)
set.seed(213)
NB.tune <- data.frame(fL=c(0,0.5,1.0), usekernel = TRUE, adjust=c(0,0.5,1.0))
# model training with tuned hyper-parameters
NB.model <- caret::train(formula.NC,trainX,'nb',
trControl=trainControl(method= 'cv',number= 5),
tuneGrid = NB.tune)
## Warning: model fit failed for Fold1: fL=0.0, usekernel=TRUE, adjust=0.0 Error in density.default(xx, ...) : 'bw' is not positive.
## Warning: model fit failed for Fold2: fL=0.0, usekernel=TRUE, adjust=0.0 Error in density.default(xx, ...) : 'bw' is not positive.
## Warning: model fit failed for Fold3: fL=0.0, usekernel=TRUE, adjust=0.0 Error in density.default(xx, ...) : 'bw' is not positive.
## Warning: model fit failed for Fold4: fL=0.0, usekernel=TRUE, adjust=0.0 Error in density.default(xx, ...) : 'bw' is not positive.
## Warning: model fit failed for Fold5: fL=0.0, usekernel=TRUE, adjust=0.0 Error in density.default(xx, ...) : 'bw' is not positive.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
# make predictions using valid set
NB.pred <- predict(NB.model, validX)
NB.cm.valid <- confusionMatrix(NB.pred, validX$Class)
NB.cm.valid$table
## Reference
## Prediction c-CS-m c-CS-s c-SC-m c-SC-s t-CS-m t-CS-s t-SC-m t-SC-s
## c-CS-m 20 2 0 0 2 3 0 0
## c-CS-s 8 23 0 0 5 0 0 0
## c-SC-m 0 0 28 0 0 0 1 0
## c-SC-s 0 0 3 13 0 0 1 0
## t-CS-m 6 1 0 0 26 1 0 0
## t-CS-s 0 0 0 0 1 19 0 0
## t-SC-m 0 0 0 0 0 0 23 0
## t-SC-s 0 0 0 5 0 0 0 25
NB.cm.valid$overall[1:2]
## Accuracy Kappa
## 0.8194444 0.7927981
Artificial Neural Netwok
Here, I have set the argument “linear.output” to FALSEin order to tell the model that I want to apply the activation function and that I am not doing a regression task. Then I set the activation function to logistic (which by the way is the default option) in order to apply the logistic function.As far as the number of hidden neurons and layers, I tried some combinations and the one used seemed to perform slightly better than the others (around 1% of accuracy difference in cross-validation score.
set.seed(334)
# fit the model
NN.model = neuralnet::neuralnet(formula.NC, trainX,
hidden=c(14, 10, 8), linear.output = FALSE,
act.fct ="logistic")
# plot the neural network
plot(NN.model)
tablePred.NN <- function(model, data){
# make predictions using valid set
raw.pred <- data.frame(neuralnet::compute(model,
data.frame(data[,-ncol(data)]))$net.result)
# initialize target labels
labels <- c("c-CS-m", "c-CS-s", "c-SC-m", "c-SC-s",
"t-CS-m", "t-CS-s", "t-SC-m", "t-SC-s")
pred.label <- data.frame(max.col(raw.pred)) %>%
mutate(prediction=labels[max.col.raw.pred.])
# make predictions using valid set
preds <- as.factor(pred.label[,2])
confMat <- confusionMatrix(validX$Class, preds, mode="prec_recall")
return(list(preds,confMat))
}
NN.cm.valid <- tablePred.NN(NN.model, validX)
NN.cm.valid[2]
## [[1]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction c-CS-m c-CS-s c-SC-m c-SC-s t-CS-m t-CS-s t-SC-m t-SC-s
## c-CS-m 30 4 0 0 0 0 0 0
## c-CS-s 0 26 0 0 0 0 0 0
## c-SC-m 0 0 31 0 0 0 0 0
## c-SC-s 0 1 0 11 0 0 2 4
## t-CS-m 0 2 0 0 30 2 0 0
## t-CS-s 2 0 0 0 0 21 0 0
## t-SC-m 0 0 0 0 0 0 25 0
## t-SC-s 0 0 0 0 0 0 0 25
##
## Overall Statistics
##
## Accuracy : 0.9213
## 95% CI : (0.877, 0.9535)
## No Information Rate : 0.1528
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9096
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: c-CS-m Class: c-CS-s Class: c-SC-m Class: c-SC-s
## Precision 0.8824 1.0000 1.0000 0.61111
## Recall 0.9375 0.7879 1.0000 1.00000
## F1 0.9091 0.8814 1.0000 0.75862
## Prevalence 0.1481 0.1528 0.1435 0.05093
## Detection Rate 0.1389 0.1204 0.1435 0.05093
## Detection Prevalence 0.1574 0.1204 0.1435 0.08333
## Balanced Accuracy 0.9579 0.8939 1.0000 0.98293
## Class: t-CS-m Class: t-CS-s Class: t-SC-m Class: t-SC-s
## Precision 0.8824 0.91304 1.0000 1.0000
## Recall 1.0000 0.91304 0.9259 0.8621
## F1 0.9375 0.91304 0.9615 0.9259
## Prevalence 0.1389 0.10648 0.1250 0.1343
## Detection Rate 0.1389 0.09722 0.1157 0.1157
## Detection Prevalence 0.1574 0.10648 0.1157 0.1157
## Balanced Accuracy 0.9892 0.95134 0.9630 0.9310
Multinomial Logistic Regression
For our third model, we use Multinomial logistic regression from nnet() package since it is highly preferred if you want to perform multi-class classification using logistic regression.
set.seed(232)
# fit the model
MGR.model <- nnet::multinom(formula.NC, data = trainX, trace = F)
# make predictions using valid set
MGR.pred <- predict(MGR.model, newdata=validX)
# check reference and predicted classification
MGR.cm.valid <- confusionMatrix(validX$Class, MGR.pred)
MGR.cm.valid$table
## Reference
## Prediction c-CS-m c-CS-s c-SC-m c-SC-s t-CS-m t-CS-s t-SC-m t-SC-s
## c-CS-m 26 7 0 0 0 0 1 0
## c-CS-s 2 22 0 0 2 0 0 0
## c-SC-m 0 0 31 0 0 0 0 0
## c-SC-s 0 0 1 17 0 0 0 0
## t-CS-m 0 2 0 0 32 0 0 0
## t-CS-s 0 0 0 0 0 23 0 0
## t-SC-m 0 0 0 1 0 0 24 0
## t-SC-s 0 0 0 0 0 0 0 25
MGR.cm.valid$overall[1:2]
## Accuracy Kappa
## 0.9259259 0.9149753
Test performance of Base learners
# evaluate base learners test performances
# model1
NB.pred.test <- predict(NB.model, testX)
NB.cm.test <- confusionMatrix(NB.pred.test, testX$Class, mode="prec_recall")
NB.cm.test$table
## Reference
## Prediction c-CS-m c-CS-s c-SC-m c-SC-s t-CS-m t-CS-s t-SC-m t-SC-s
## c-CS-m 27 1 0 0 3 4 0 0
## c-CS-s 6 16 0 1 6 0 0 0
## c-SC-m 0 0 27 0 0 0 0 0
## c-SC-s 0 0 3 23 0 0 0 0
## t-CS-m 3 3 0 0 21 2 0 0
## t-CS-s 0 3 0 0 1 14 0 0
## t-SC-m 0 0 0 4 0 0 22 0
## t-SC-s 1 0 0 0 0 0 0 25
# model2
NN.tablePred <- tablePred.NN(NN.model, testX)
NN.cm.test <- NN.tablePred[2]
NN.cm.test
## [[1]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction c-CS-m c-CS-s c-SC-m c-SC-s t-CS-m t-CS-s t-SC-m t-SC-s
## c-CS-m 33 0 0 0 1 0 0 0
## c-CS-s 1 22 2 0 0 0 1 0
## c-SC-m 3 0 26 0 0 0 2 0
## c-SC-s 1 0 0 17 0 0 0 0
## t-CS-m 0 1 2 3 23 2 3 0
## t-CS-s 2 0 0 0 0 18 3 0
## t-SC-m 0 1 1 0 5 0 18 0
## t-SC-s 0 0 1 0 1 0 0 23
##
## Overall Statistics
##
## Accuracy : 0.8333
## 95% CI : (0.7768, 0.8805)
## No Information Rate : 0.1852
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8084
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: c-CS-m Class: c-CS-s Class: c-SC-m Class: c-SC-s
## Precision 0.9706 0.8462 0.8387 0.94444
## Recall 0.8250 0.9167 0.8125 0.85000
## F1 0.8919 0.8800 0.8254 0.89474
## Prevalence 0.1852 0.1111 0.1481 0.09259
## Detection Rate 0.1528 0.1019 0.1204 0.07870
## Detection Prevalence 0.1574 0.1204 0.1435 0.08333
## Balanced Accuracy 0.9097 0.9479 0.8927 0.92245
## Class: t-CS-m Class: t-CS-s Class: t-SC-m Class: t-SC-s
## Precision 0.6765 0.78261 0.72000 0.9200
## Recall 0.7667 0.90000 0.66667 1.0000
## F1 0.7188 0.83721 0.69231 0.9583
## Prevalence 0.1389 0.09259 0.12500 0.1065
## Detection Rate 0.1065 0.08333 0.08333 0.1065
## Detection Prevalence 0.1574 0.10648 0.11574 0.1157
## Balanced Accuracy 0.8538 0.93724 0.81481 0.9948
# model3
MGR.pred.test <- predict(MGR.model, testX)
MGR.cm.test <- confusionMatrix(MGR.pred.test, testX$Class)
MGR.cm.test$table
## Reference
## Prediction c-CS-m c-CS-s c-SC-m c-SC-s t-CS-m t-CS-s t-SC-m t-SC-s
## c-CS-m 29 1 0 0 2 1 0 0
## c-CS-s 2 19 0 0 2 0 0 0
## c-SC-m 0 0 28 1 0 0 1 0
## c-SC-s 0 1 0 26 0 0 0 0
## t-CS-m 3 1 0 0 27 0 0 0
## t-CS-s 1 1 0 0 0 19 0 0
## t-SC-m 2 0 2 1 0 0 21 0
## t-SC-s 0 0 0 0 0 0 0 25
## In-built Ensemble Model --bagging
# fit the model
bag.model <- bagging(formula.NC, trainX)
# estimate predictions on test data
bag.pred <- predict(bag.model, testX)
# get the model performance
bag.cm.test <- confusionMatrix(as.factor(bag.pred), testX$Class)
bag.cm.test$table
## Reference
## Prediction c-CS-m c-CS-s c-SC-m c-SC-s t-CS-m t-CS-s t-SC-m t-SC-s
## c-CS-m 35 1 0 0 0 1 0 0
## c-CS-s 0 22 0 0 1 2 0 0
## c-SC-m 0 0 28 0 0 0 0 1
## c-SC-s 0 0 2 27 0 0 0 0
## t-CS-m 1 0 0 0 30 1 0 0
## t-CS-s 0 0 0 0 0 16 0 0
## t-SC-m 1 0 0 1 0 0 22 0
## t-SC-s 0 0 0 0 0 0 0 24
Here, we create a stacked ensemble learner to check whether we can improve the classification performance for analysis even more. Here, we first generate raw/prob prediction values of our three base models on valid data and use it as level-one dataset for training ensemble learner using “rf” (as decision tree based models always perform quite well for classification). After fitting our model on level-one dataset, we generate raw test predictions from base learner as our test dataset for predicting values from our super learner.
# Custom Stacked Ensemble learner
stackLearner <- function(model1, model2, model3, testData, validData){
## --NB.model: model1 (Naive Bayes)
## --NN.model: model2 (Artificial Neural Network)
## --MGR.model: model3 (Multinomial Logistic regression)
# raw predictions from the base models
validpred1 <- predict(model1, validData, type= "raw")
validpred2 <- predict(model2, validData, type= "raw")
colnames(validpred2) <- colnames(validpred1)
validpred3 <- predict(model3, validData, type= "probs")
# Generate level-one dataset for training the ensemble super-learner
basePred <- data.frame(validpred1, validpred2, validpred3, Class= validData$Class,
stringsAsFactors = F)
# Train the ensemble
set.seed(270)
modelStack <- train(Class~., data = basePred, method = "rf")
# Generate predictions on the test set
testPred1 <- predict(model1, newdata = testData,type= "raw")
testPred2 <- predict(model2, newdata = testData, type= "raw")
testPred3 <- predict(model3, newdata = testData,type= "probs")
# Using the base learner test set predictions,
# create the level-one dataset to feed to the ensemble
testPredLevelOne <- data.frame(testPred1, testPred2, testPred3,
Class= testX$Class, stringsAsFactors = F)
# estimate predictions over tested base models
colnames(testPredLevelOne) <- colnames(basePred)
combPred <- predict(modelStack, testPredLevelOne)
# Evaluate ensemble test performance
confMatrix <- confusionMatrix(combPred, testData$Class, mode="prec_recall")
return(list(confMatrix, combPred))
}
# func call
stackResults <- stackLearner(model1 = NB.model,
model2 = NN.model,
model3 = MGR.model,
testData = testX, validData = validX)
# Ensemble prediction matrix
ENS.confMat <- stackResults[1]
ENS.confMat
## [[1]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction c-CS-m c-CS-s c-SC-m c-SC-s t-CS-m t-CS-s t-SC-m t-SC-s
## c-CS-m 35 1 0 0 0 1 0 0
## c-CS-s 0 22 0 0 1 0 0 0
## c-SC-m 0 0 29 1 0 0 0 0
## c-SC-s 0 0 0 26 0 0 0 0
## t-CS-m 2 0 0 0 30 0 0 0
## t-CS-s 0 0 0 0 0 19 0 0
## t-SC-m 0 0 1 1 0 0 22 0
## t-SC-s 0 0 0 0 0 0 0 25
##
## Overall Statistics
##
## Accuracy : 0.963
## 95% CI : (0.9283, 0.9839)
## No Information Rate : 0.1713
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9574
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: c-CS-m Class: c-CS-s Class: c-SC-m Class: c-SC-s
## Precision 0.9459 0.9565 0.9667 1.0000
## Recall 0.9459 0.9565 0.9667 0.9286
## F1 0.9459 0.9565 0.9667 0.9630
## Prevalence 0.1713 0.1065 0.1389 0.1296
## Detection Rate 0.1620 0.1019 0.1343 0.1204
## Detection Prevalence 0.1713 0.1065 0.1389 0.1204
## Balanced Accuracy 0.9674 0.9757 0.9806 0.9643
## Class: t-CS-m Class: t-CS-s Class: t-SC-m Class: t-SC-s
## Precision 0.9375 1.00000 0.9167 1.0000
## Recall 0.9677 0.95000 1.0000 1.0000
## F1 0.9524 0.97436 0.9565 1.0000
## Prevalence 0.1435 0.09259 0.1019 0.1157
## Detection Rate 0.1389 0.08796 0.1019 0.1157
## Detection Prevalence 0.1481 0.08796 0.1111 0.1157
## Balanced Accuracy 0.9785 0.97500 0.9948 1.0000
From the matrix summary, it is quite evident that are stacked ensemble showcased exceptional results in terms of accuracy and kappa metrics (~98% and ~97%).
# custom functio to print important metrics of our analyses
evalMetrics <- function(confMat){
# metrics-1
print("The accuracy and kappa of the Naive Bayes model are")
print(round(confMat$overall[1:2] * 100,2))
# metrics-2
print("The precision, recall and F1-score for target classes are")
print(confMat$byClass[c(1,2,5,6),c(5,6,7)])
}
# model-1: Naive Bayes
evalMetrics(confMat = NB.cm.test)
## [1] "The accuracy and kappa of the Naive Bayes model are"
## Accuracy Kappa
## 81.02 78.23
## [1] "The precision, recall and F1-score for target classes are"
## Precision Recall F1
## Class: c-CS-m 0.7714286 0.7297297 0.7500000
## Class: c-CS-s 0.5517241 0.6956522 0.6153846
## Class: t-CS-m 0.7241379 0.6774194 0.7000000
## Class: t-CS-s 0.7777778 0.7000000 0.7368421
# model-3: Multi-nomial Logistic Regression
evalMetrics(confMat = MGR.cm.test)
## [1] "The accuracy and kappa of the Naive Bayes model are"
## Accuracy Kappa
## 89.81 88.32
## [1] "The precision, recall and F1-score for target classes are"
## Precision Recall F1
## Class: c-CS-m 0.8787879 0.7837838 0.8285714
## Class: c-CS-s 0.8260870 0.8260870 0.8260870
## Class: t-CS-m 0.8709677 0.8709677 0.8709677
## Class: t-CS-s 0.9047619 0.9500000 0.9268293
# model-5: Bagging
evalMetrics(confMat = bag.cm.test)
## [1] "The accuracy and kappa of the Naive Bayes model are"
## Accuracy Kappa
## 94.44 93.61
## [1] "The precision, recall and F1-score for target classes are"
## Precision Recall F1
## Class: c-CS-m 0.9459459 0.9459459 0.9459459
## Class: c-CS-s 0.8800000 0.9565217 0.9166667
## Class: t-CS-m 0.9375000 0.9677419 0.9523810
## Class: t-CS-s 1.0000000 0.8000000 0.8888889
Comparing our base models over the test set, it appears that multinomial logistic regression has surpassed all the other models with respect to accuracy and kappa of ~91% and ~90%.
For other metrics, more importantly, if we check the class t-CS-m & t-CS-s indicating Rescued learning and Failed learning, we observe again that precision, recall, and F1-score of MGR model is beating other model values.
Although, when we compare MGR to Bagging, we see altogether different results. It seems logical as bagging being an ensemble tree model which fits much better to classification data.
Furthermore, we can also firmly say that our Stacked Ensemble Super learner has performed sub-par from all the other models in all metrics and even beating the built-in ensemble model -bagging.
As our first step for evaluation, we plot ROC curve along the respective AUC values for the target sub-classes that define three major learning outcomes i.e. normal learning, rescued learning, failed learning
# plotting the classes that are significant to our learning outcomes
# normal learning: "c-CS-m"
rocModels <- function(pred, test){
roc.pred <- as.ordered(pred)
# get roc results for every class label
res<- pROC::multiclass.roc(test$Class, roc.pred, quiet = T,
levels = c("c-CS-m", "c-CS-s", "c-SC-m", "c-SC-s",
"t-CS-m", "t-CS-s", "t-SC-m", "t-SC-s"))
plot.roc(res$rocs[[1]],
print.auc=T,
legacy.axes = T, main="AUC plot")
# normal learning: "c-CS-s"
plot.roc(res$rocs[[2]],
add=T, col = 'red',
print.auc = T,
legacy.axes = T,
print.auc.adj = c(0,3))
# rescued learning: "t-CS-m"
plot.roc(res$rocs[[4]],add=T, col = 'blue',
print.auc=T,
legacy.axes = T,
print.auc.adj = c(0,5))
# failed learning: "t-CS-s"
plot.roc(res$rocs[[5]],
add=T, col = 'green',
print.auc = T,
legacy.axes = T,
print.auc.adj = c(0,7))
legend('bottomright',
legend = c('c-CS-m: NL-1','c-CS-s: NL-2',
't-CS-m: RL', 't-CS-s: FL'),
col=c('black','red','blue', 'green'),lwd=2)
}
# Plot AUC plots for all models
# model-1: Naive Bayes
rocModels(pred=NB.pred.test, test=testX)
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
# model-2: Artificial Neural Network
rocModels(pred=unlist(NN.tablePred[1]), test = testX)
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
# model-3: Multi-nomial Logistic Regression
rocModels(pred=MGR.pred.test, test = testX)
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
# model-4: Stacked Ensemble Learner
rocModels(pred=unlist(stackResults[2]), test = testX)
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
# model-5: Bagging
rocModels(pred=bag.pred, test = testX)
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.
## Warning in value[[3L]](cond): Ordered predictor converted to numeric vector.
## Threshold values will not correspond to values in predictor.